import os
import pandas as pd
import h5py
from utils.parquet import save_embeddings_to_parquet,read_embeddings_from_parquet

df = pd.read_csv('data/merged_table/goods_images_eval_R.csv')

R_images_id = df['image_id'].to_list()
R_goods_id = df['goods_id'].to_list()


data = read_embeddings_from_parquet('fashion_clip_features.parquet',index_col='image_name')

I = {}
R = {}
for key,value in data.items():
    goods_id = str(key.split('_')[0])
    image_id = str(key.split('_')[1].split('.')[0])
    image_url = value['imgs_url']
    embedding = value['features']
    I[image_id] = {
        'image_id':image_id,
        'image_url':image_url,
        'goods_id':goods_id,
        'embedding':embedding
    }
    if int(image_id) in R_images_id:
        R[image_id] = {
            'image_id':image_id,
            'image_url':image_url,
            'goods_id':goods_id,
            'embedding':embedding
        }

print(1)
save_embeddings_to_parquet(I,'outputs/fashion_clip/fashion_clip_features_I.parquet')
save_embeddings_to_parquet(R,'outputs/fashion_clip/fashion_clip_features_R.parquet')